import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport plotly.io as piopio.renderers.default = 'notebook'df = pd.read_csv("C:\\Users\\gdeep\\Downloads\\Unemployment in India.csv")df.head()df.shapedf.columnsdf.columns = df.columns.str.strip().str.replace(" ","_").str.lower()df.columnsdf.info()df.describe()df['date'] = pd.to_datetime(df['date'],dayfirst = 'True')df['date'].dtypesdf.isnull().sum()xxxxxxxxxxThere are 28 missing values in the datasetThere are 28 missing values in the dataset
df = df.dropna()df.shapedf.duplicated().sum()xxxxxxxxxxThere are no duplicate values in the datasetThere are no duplicate values in the dataset
df['year'] = df['date'].dt.yeardf.head()df['month'] = df['date'].dt.monthdf.head()import calendardf['month'] = df['month'].apply(lambda x: calendar.month_abbr[x])df.head()df.dtypesxxxxxxxxxx### Univariate - Statistical Non Visual Analysisdiscrete_df = df.select_dtypes(include=["object","int32"])numerical_df = df.select_dtypes(include=["float64"])def discrete_univariate_analysis(discrete_data): for col_name in discrete_data: print("*"*10, col_name, "*"*10) print(discrete_data[col_name].agg(['count', 'nunique', 'unique'])) print('Value Counts: \n', discrete_data[col_name].value_counts()) print()discrete_univariate_analysis(discrete_df)def numerical_univariate_analysis(numerical_data): for col in numerical_data: print("*"*10,col,"*"*10) print(numerical_data[col].agg(['min','max','mean','median','std'])) print()numerical_univariate_analysis(numerical_df)sns.histplot(data=df,x="estimated_unemployment_rate_(%)",hue='area',kde=True)sns.histplot(data=df,x="estimated_employed",hue='area',kde=True)sns.histplot(data=df,x="estimated_labour_participation_rate_(%)",hue='area',kde=True)fig = px.bar(df,y='region',color='region')fig.show()sns.countplot(data=df, x='year')sns.countplot(data=df,x='month')sns.countplot(data=df,x="area")plt.figure(figsize=(4,4))plt.title("Correlation Matrix Heatmap")sns.heatmap(numerical_df.corr(), annot=True, linewidths=0.5)plt.show()import plotly.express as pxfig = px.box(df, x='region',y='estimated_labour_participation_rate_(%)',color='region',title="estimated_labour_participation_rate_(%) by region")fig.show()fig = px.box(df, x='region',y='estimated_unemployment_rate_(%)',color='region',title="estimated_unemployment_rate_(%) by region")fig.show()fig = px.box(df, x='region',y='estimated_employed',color='region',title="estimated_employed by region")fig.show()fig = px.box(df, x='region',y='estimated_employed',facet_col='year',color='region',title="estimated_employed by region")fig.show()fig = px.box(df, x='region',y='estimated_unemployment_rate_(%)',facet_col='year',color='region',title="estimated_unemployment_rate_(%) by region")fig.show()fig = px.box(df, x='region',y='estimated_labour_participation_rate_(%)',facet_col='year',color='region',title="estimated_labour_participation_rate_(%) by region")fig.show()state=df.groupby(['region','year'])[['estimated_unemployment_rate_(%)']].mean()state=pd.DataFrame(state).reset_index()fig=px.bar(state,x='region',y='estimated_unemployment_rate_(%)',color='region',facet_col='year',title='average unemployment rate before corona 2019 and after corona 2020 state wise',template='ggplot2')fig.update_xaxes(categoryorder='category ascending')fig.show()before = df[df['year']==2019]after = df[df['year']==2020]before_corona = before.groupby('region')['estimated_unemployment_rate_(%)'].mean().reset_index()after_corona = after.groupby('region')['estimated_unemployment_rate_(%)'].mean().reset_index()unemployment_rate = before.groupby('region')['estimated_unemployment_rate_(%)'].mean().reset_index()unemployment_rate['unemployment_rate_after_corona'] = after_corona['estimated_unemployment_rate_(%)']unemployment_rate.columns = ['region','unemployment_rate_before_corona','unemployment_rate_after_corona']unemployment_rate.head()unemployment_rate['rate_change_in_unemployment'] =round(unemployment_rate['unemployment_rate_after_corona']- unemployment_rate['unemployment_rate_before_corona'] /unemployment_rate['unemployment_rate_before_corona'],2)fig = px.bar(unemployment_rate,x='region',y='rate_change_in_unemployment',color='rate_change_in_unemployment', title='Percentage change in Unemployment rate in each state after corona',template='ggplot2')fig.update_layout(xaxis={'categoryorder':'total ascending'})fig.show()xxxxxxxxxxAfter analysing the dataset, we can gain insights on how the corona crisis affected the unemployment rate in various states of India.The labour pariticipation rate got decreased during corona. The states most affected in unemployment rate due to corona are Haryana, Jharkhand, Tripura, Bihar and Puducherry whereas Uttarpradesh is the state with most employees.After analysing the dataset, we can gain insights on how the corona crisis affected the unemployment rate in various states of India. The labour pariticipation rate got decreased during corona. The states most affected in unemployment rate due to corona are Haryana, Jharkhand, Tripura, Bihar and Puducherry whereas Uttarpradesh is the state with most employees.